.my_button {
background-color: #caa8b1;
color: #3c2a34;
padding: 10px;
font-size: 14px;
border: none;
cursor: pointer;
width: 70px;
}
h1, h2, h3, h4, h5, h6 {
text-align: center;
color: #e0cdd6;
}
body {
background-color: #3c2a34;
}
table {
margin:1em auto;
color: #e0cdd6;
}
project_path = "D:/moje/projekty/chess-in-the-digital-age"
setwd(paste(project_path,"/presentation", sep = ""))
Dataset_path = paste(project_path,"/Dataset", sep = "")
selected_date = "2024-04"
knitr::opts_chunk$set(root= paste(project_path,"/presentation", sep = ""))
library(pander)
panderOptions('digits',7)
library(knitr)
library(rvest)
library(stringi)
library(dplyr)
library(scales)
library(ggplot2)
library(png)
library("patchwork")
library(forcats)
img <- readPNG("../pictures/queens_gambit.png", native = TRUE)
img2 = readPNG("../pictures/candidates_tournament.png", native = TRUE)
img3 = readPNG("../pictures/covid.png", native = TRUE)
img4 = readPNG("../pictures/opening_icon.png", native = TRUE)
url = "https://database.lichess.org"
path= "/html/body/div/div[2]/div/section[1]/table"
wezel = html_node(read_html(url), xpath=path)
const div_my_init = document.getElementById("my_init");
div_init.style.display = 'none';
var button_my_init = document.getElementById("button_my_init");
function hideMy_init() {
if (div_my_init.style.display === 'none') {
div_my_init.style.display = 'block';
button_my_init.innerHTML = "Ukryj";
} else {
div_my_init.style.display = 'none';
button_my_init.innerHTML = "Pokaż";
}
}
# wybor tabeli z iloscia zagranych partii wedlug miesiaca
games_count_html = html_table(wezel)[[3]]
my_games_count <- data.frame(count=as.numeric(stri_replace_all(games_count_html,"",regex = "\\,"))[-length(games_count_html)])
# wybor lat do analizy na podstawie linkow .zst.torrent
hyperlinks = html_nodes(wezel, "a")
my_href = html_attr(hyperlinks, "href")
links = data.frame(links = paste(url,my_href,sep="/"))
init_links_for_download = links[seq(2,nrow(links), by=2), 1]
# wyodrebnianie lat z linkow
my_year_from_imported_links = data.frame(year=data.frame(stri_match_all(data.frame(init_links_for_download),
regex = "rated_\\s*(.*?)\\s*[-]"))[,2])
Data_games_count = cbind(my_year_from_imported_links,my_games_count)
# szukanie indeksow rozpoczynajacych nowy rok w zbiorze
first_index_of_years=c()
for(i in unique(my_year_from_imported_links)[,]){
first_index_of_years = append(first_index_of_years,which(rev(Data_games_count$year)==i)[1])
}
my_plot = ggplot(Data_games_count, aes(x = rev(1:nrow(my_games_count)), y = count)) +
# typ wykresu
geom_bar(stat = "identity",aes(fill = year)) +
# opis
labs(x = "Years", y = "Number of chess games", title = "Number of chess games played on the lichess website") +
# legenda
scale_fill_manual(name = "Years", values = unique(Data_games_count$year)) +
expand_limits(x = c(0, NA), y = c(0,NA)) +
scale_x_continuous(breaks = c(rev(first_index_of_years)),
labels = c(rev(unique(my_year_from_imported_links)[,]))) +
scale_y_continuous(labels = unit_format(unit = "M", scale = 1e-6)) +
theme(text = element_text(size = 20),
axis.text.x = element_text(angle = 90, hjust = 1))
my_plot
ggsave("../poster/Data_games_countv1.png")
# strzalka
my_plot + geom_segment(aes(70, 68000000, xend = 86, yend = 62000000),
linewidth=1.2,
arrow = arrow(length = unit(5, "mm"))) +
# zdjecia
inset_element(p = img,
left = 0.02,
bottom = 0.54,
right = 0.42,
top = 0.9) +
inset_element(p = img2,
left = 0.05,
bottom = 0.29,
right = 0.5,
top = 0.52)+
inset_element(p = img3,
left = 0.44,
bottom = 0.69,
right = 0.65,
top = 0.82)
ggsave("../poster/Data_games_count.png")
Gambit Królowej
hyperlinks = html_nodes(wezel, "a")
my_href = html_attr(hyperlinks, "href")
pander(head(bind_rows(lapply(html_attrs(hyperlinks), function(x) data.frame(as.list(x), stringsAsFactors=FALSE)))))
| href |
|---|
| standard/lichess_db_standard_rated_2024-05.pgn.zst |
| standard/lichess_db_standard_rated_2024-05.pgn.zst.torrent |
| standard/lichess_db_standard_rated_2024-04.pgn.zst |
| standard/lichess_db_standard_rated_2024-04.pgn.zst.torrent |
| standard/lichess_db_standard_rated_2024-03.pgn.zst |
| standard/lichess_db_standard_rated_2024-03.pgn.zst.torrent |
links = data.frame(links = paste(url,my_href,sep="/"))
pander(head(links))
choose_max_date_to_scrap = function(date){
# co drugi link to .zst.torrent wiec go wyodrebniam
init_links_for_download = links[seq(2,nrow(links), by=2), 1]
full_date_from_imported_links = data.frame(stri_match_all(data.frame(init_links_for_download),
regex = "rated_\\s*(.*?)\\s*[.]"))[,2]
date_location = which(stri_detect(full_date_from_imported_links, regex = date) == TRUE)
my_links_for_download = init_links_for_download[c(date_location:length(init_links_for_download))]
return(my_links_for_download)
}
links_for_download = choose_max_date_to_scrap(selected_date)
pander(data.frame(prepared_links = head(links_for_download)))
full_date_from_imported_links = data.frame(stri_match_all(data.frame(links_for_download),
regex = "rated_\\s*(.*?)\\s*[.]"))[,2]
pander(head(data.frame(full_date_from_imported_links)))
| full_date_from_imported_links |
|---|
| 2024-04 |
| 2024-03 |
| 2024-02 |
| 2024-01 |
| 2023-12 |
| 2023-11 |
year_from_imported_links = data.frame(stri_match_all(data.frame(links_for_download),
regex = "rated_\\s*(.*?)\\s*[-]"))[,2]
pander(head(data.frame(year_from_imported_links)))
| year_from_imported_links |
|---|
| 2024 |
| 2024 |
| 2024 |
| 2024 |
| 2023 |
| 2023 |
month_from_imported_links = data.frame(stri_match_all(data.frame(links_for_download),
regex = "-\\s*(.*?)\\s*[.]"))[,2]
pander(head(data.frame(month_from_imported_links)))
| month_from_imported_links |
|---|
| 04 |
| 03 |
| 02 |
| 01 |
| 12 |
| 11 |
my_month_names = month.abb[as.integer(month_from_imported_links)]
pander(head(data.frame(my_month_names)))
| my_month_names |
|---|
| Apr |
| Mar |
| Feb |
| Jan |
| Dec |
| Nov |
my_paths_year <- unique(paste(Dataset_path,"/",year_from_imported_links, sep=""))
pander(data.frame(my_paths_year))
| my_paths_year |
|---|
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2024 |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2023 |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2022 |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2021 |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2020 |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2019 |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2018 |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2017 |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2016 |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2015 |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2014 |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2013 |
create_folders_year = function(){
for(i in 1:length(my_paths_year)){
dir.create(my_paths_year[i])
}
}
my_paths_month <- paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names, sep="")
pander(head(data.frame(my_paths_month)))
| my_paths_month |
|---|
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/04. Apr |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/03. Mar |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/02. Feb |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/01. Jan |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2023/12. Dec |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2023/11. Nov |
create_folders_month = function(){
for(i in 1:length(my_paths_month)){
dir.create(my_paths_month[i])
}
}
my_paths = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/lichess_db_standard_rated_",full_date_from_imported_links,".pgn.zst.torrent", sep="")
pander(head(data.frame(my_paths)))
| my_paths |
|---|
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/04. Apr/lichess_db_standard_rated_2024-04.pgn.zst.torrent |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/03. Mar/lichess_db_standard_rated_2024-03.pgn.zst.torrent |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/02. Feb/lichess_db_standard_rated_2024-02.pgn.zst.torrent |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/01. Jan/lichess_db_standard_rated_2024-01.pgn.zst.torrent |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2023/12. Dec/lichess_db_standard_rated_2023-12.pgn.zst.torrent |
| D:/moje/projekty/chess-in-the-digital-age/Dataset/2023/11. Nov/lichess_db_standard_rated_2023-11.pgn.zst.torrent |
downloading = function(){
create_folders_year()
create_folders_month()
for(j in 1:length(my_paths)){
download.file(links_for_download[j], my_paths[j], mode="wb",Sys.sleep(0.2))
}
}
downloading()
const my_div1 = document.getElementById("tworzenie-struktury");
my_div1.style.display = 'none';
var button = document.getElementById("my_button");
function myFunction() {
if (my_div1.style.display === 'none') {
my_div1.style.display = 'block';
button.innerHTML = "Ukryj";
} else {
my_div1.style.display = 'none';
button.innerHTML = "Pokaż";
}
}
choose_max_date_to_scrap_game_count = function(date){
# wybor tabeli do analizy
my_games_count = html_table(wezel)[[3]]
# wybor lat do analizy na podstawie linkow .zst.torrent
init_links_for_download = links[seq(2,nrow(links), by=2), 1]
# wyodrebnianie dat z linkow
full_date_from_imported_links = data.frame(stri_match_all(data.frame(init_links_for_download),
regex = "rated_\\s*(.*?)\\s*[.]"))[,2]
# szukanie indeksow z datami odpowiadajacymi wybranej dacie
date_location = which(stri_detect(full_date_from_imported_links, regex = date) == TRUE)
# wyswietlanie liczb gier dla wybranych dat
games_count = my_games_count[c(date_location:length(init_links_for_download))]
return(games_count)
}
games_count = choose_max_date_to_scrap_game_count(selected_date)
pander(head(data.frame(games_count)))
| games_count |
|---|
| 91,377,787 |
| 95,804,114 |
| 91,567,975 |
| 98,994,760 |
| 96,909,211 |
| 92,389,636 |
preparing_month_dataset = function(date, data_size = 0.001){
# szukanie indeksu wybranej daty
searching_location = which(stri_detect(full_date_from_imported_links, regex = date) == TRUE)
# szukanie liczby meczy dla wybranej daty i ograniczanie jej wzgledem wyszukanych wartosci
decreasing_game_number <- round((as.numeric(stri_replace_all(games_count,"",regex = "\\,"))*18*data_size)[searching_location])
# definiowanie pliku do przerobki na podstawie wybranej daty
pgn_file_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/lichess_db_standard_rated_",full_date_from_imported_links,".pgn", sep="")[searching_location]
# wczytanie pliku do zdefiniowanej dlugosci
my_pgn <- read.table(pgn_file_to_read,
quote="", sep="\n", stringsAsFactors=FALSE, nrows = decreasing_game_number)
# szukanie ostatnich 22 nazw kolumn
colnms <- sub("\\[(\\w+).+", "\\1", my_pgn[(decreasing_game_number-22):decreasing_game_number,1])
# szukanie indeksow rozpoczynajacych nowe partie
Event_location = which(stri_detect(colnms, regex = "Event") == TRUE)
# usuwanie danych pod wyszukanym indeksie
my_pgn2 = my_pgn[1:(decreasing_game_number-22+Event_location-2),]
# zapis przetworzonego zbioru do pliku
pgn_file_to_write = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",date,".pgn", sep="")[searching_location]
write.table(my_pgn2,pgn_file_to_write,col.names = FALSE,row.names = FALSE,quote = FALSE)
test_path = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",date,".pgn", sep="")[searching_location]
# tworzenie pliku dla statusu pracy
file_txt_for_info = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/ograniczono_dane.txt", sep="")[searching_location]
write.table(" ",file_txt_for_info)
testing_last_char <- read.table(test_path,
quote="", sep="\n", stringsAsFactors=FALSE)
return(tail(testing_last_char))
}
removing_oryginal_dataset = function(date){
# szukanie indeksu wybranej daty
searching_location = which(stri_detect(full_date_from_imported_links, regex = date) == TRUE)
# definiowanie plikow do usuniecia
file_pgn_zst_to_remove = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/lichess_db_standard_rated_",full_date_from_imported_links,".pgn.zst", sep="")[searching_location]
file_pgn_to_remove = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/lichess_db_standard_rated_",full_date_from_imported_links,".pgn", sep="")[searching_location]
# tworzenie pliku dla statusu pracy
file_txt_for_info = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/usunieto_oryginal.txt", sep="")[searching_location]
file.remove(file_pgn_zst_to_remove)
file.remove(file_pgn_to_remove)
write.table(" ",file_txt_for_info)
}
preparing_month_dataset("2021-12")
removing_oryginal_dataset("2013-01")
const my_div2 = document.getElementById("przygotowywanie-zbiorow");
my_div2.style.display = 'none';
var button2 = document.getElementById("my_button2");
function myFunction2() {
if (my_div2.style.display === 'none') {
my_div2.style.display = 'block';
button2.innerHTML = "Ukryj kod";
} else {
my_div2.style.display = 'none';
button2.innerHTML = "Pokaż kod";
}
}
# komenda szukajaca powtarzalnosc danej kolumny
# zmienne = fct_count(fct_infreq(sub("\\[(\\w+).+", "\\1", pgn[1:nrow(pgn.df),1])))
files_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",full_date_from_imported_links,".pgn", sep="")
Percent_games_count=c()
for(i in 1:length(files_to_read)){
pgn <- read.table(files_to_read[i], quote="", sep="\n", stringsAsFactors=FALSE)
pgn.df <- data.frame(matrix(sub("\\[\\w+ \\\"(.+)\\\"\\]", "\\1", pgn[,1]),
byrow=TRUE, ncol=1))
opening_index = which(sub("\\[(\\w+).+", "\\1", pgn[1:nrow(pgn.df),1]) == "Opening")
All_games_count = length(pgn.df[opening_index,])
Queens_Gambit_games_count = length(which(stri_detect(pgn.df[opening_index,], regex = "Queen's Gambit") == TRUE))
Percent_games_count = append(Percent_games_count,(Queens_Gambit_games_count/All_games_count)*100)
}
my_Percent_games_count <- data.frame(count=Percent_games_count)
Data_Queens_gambit_games_count = cbind(data.frame(year=year_from_imported_links),my_Percent_games_count)
ggplot(Data_Queens_gambit_games_count, aes(x = rev(1:length(games_count)), y = count)) +
# typ wykresu
geom_bar(stat = "identity",aes(fill = year)) +
# opis
labs(x = "Years", y = "Number of chess games ( % ) ", title = "Number of 'Queen's Gambit' openings played") +
# legenda
scale_fill_manual(name = "Years", values = unique(Data_Queens_gambit_games_count$year)) +
expand_limits(x = c(0, NA), y = c(0,NA)) +
scale_x_continuous(breaks = c(rev(first_index_of_years)),
labels = c(rev(unique(year_from_imported_links)))) +
scale_y_continuous(labels = unit_format(unit = "%", scale = 1)) +
theme(text = element_text(size = 20),
axis.text.x = element_text(angle = 90, hjust = 1))
ggsave("../poster/Data_Queens_gambit_games_count.png")
files_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",full_date_from_imported_links,".pgn", sep="")
novice_games_count=c()
for(i in 1:length(files_to_read)){
pgn <- read.table(files_to_read[i], quote="", sep="\n", stringsAsFactors=FALSE)
pgn.df <- data.frame(matrix(sub("\\[\\w+ \\\"(.+)\\\"\\]", "\\1", pgn[,1]),
byrow=TRUE, ncol=1))
elo_index = which(sub("\\[(\\w+).+", "\\1", pgn[1:nrow(pgn.df),1]) == "WhiteElo")
All_games_count = length(pgn.df[elo_index,])
my_novice_games_count = length(which(as.numeric(pgn.df[elo_index,]) < 1200) == TRUE)
novice_games_count = append(novice_games_count,(my_novice_games_count/All_games_count)*100)
}
my_Percent_novice_games_count <- data.frame(count=Percent_novice_games_count)
Data_novice_games_count = cbind(data.frame(year=year_from_imported_links),my_Percent_novice_games_count)
ggplot(Data_novice_games_count, aes(x = rev(1:length(games_count)), y = count)) +
# typ wykresu
geom_bar(stat = "identity",aes(fill = year)) +
# opis
labs(x = "Years", y = "Number of chess games ( % ) ", title = "Number of novice games (rank < 1200) ") +
# legenda
scale_fill_manual(name = "Years", values = unique(Data_novice_games_count$year)) +
expand_limits(x = c(0, NA), y = c(0,NA)) +
scale_x_continuous(breaks = c(rev(first_index_of_years)),
labels = c(rev(unique(year_from_imported_links)))) +
scale_y_continuous(labels = unit_format(unit = "%", scale = 1)) +
theme(text = element_text(size = 20),
axis.text.x = element_text(angle = 90, hjust = 1)) +
# strzalka
geom_segment(aes(58, 10.68, xend = 95, yend = 10),
linewidth=1.2,
arrow = arrow(length = unit(5, "mm"))) +
# zdjecia
inset_element(p = img,
left = 0.02,
bottom = 0.54,
right = 0.42,
top = 0.9)
ggsave("../poster/Data_novice_games_count.png")
files_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",full_date_from_imported_links,".pgn", sep="")
pro_games_count=c()
for(i in 1:length(files_to_read)){
pgn <- read.table(files_to_read[i], quote="", sep="\n", stringsAsFactors=FALSE)
pgn.df <- data.frame(matrix(sub("\\[\\w+ \\\"(.+)\\\"\\]", "\\1", pgn[,1]),
byrow=TRUE, ncol=1))
elo_index_v2 = which(sub("\\[(\\w+).+", "\\1", pgn[1:nrow(pgn.df),1]) == "WhiteElo")
All_games_count_v2 = length(pgn.df[elo_index_v2,])
my_pro_games_count = length(which(as.numeric(pgn.df[elo_index_v2,]) > 1800) == TRUE)
pro_games_count = append(pro_games_count,(my_pro_games_count/All_games_count_v2)*100)
}
my_Percent_pro_games_count <- data.frame(count=Percent_pro_games_count)
Data_pro_games_count = cbind(data.frame(year=year_from_imported_links),my_Percent_pro_games_count)
ggplot(Data_pro_games_count, aes(x = rev(1:length(games_count)), y = count)) +
# typ wykresu
geom_bar(stat = "identity",aes(fill = year)) +
# opis
labs(x = "Years", y = "Number of chess games ( % ) ", title = "Number of pro games (rank > 1800) ") +
# legenda
scale_fill_manual(name = "Years", values = unique(Data_pro_games_count$year)) +
expand_limits(x = c(0, NA), y = c(0,NA)) +
scale_x_continuous(breaks = c(rev(first_index_of_years)),
labels = c(rev(unique(year_from_imported_links)))) +
scale_y_continuous(labels = unit_format(unit = "%", scale = 1)) +
theme(text = element_text(size = 20),
axis.text.x = element_text(angle = 90, hjust = 1)) +
# strzalka
geom_segment(aes(65, 38, xend = 86, yend = 37),
linewidth=1.2,
arrow = arrow(length = unit(5, "mm"))) +
inset_element(p = img3,
left = 0.39,
bottom = 0.84,
right = 0.6,
top = 0.97)
ggsave("../poster/Data_pro_games_count.png")
# filtred_two_moves_to_compare = two_moves_to_compare[c(5,6) + rep(seq(0, length(two_moves_to_compare), 6), each = 2)]
# files_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",full_date_from_imported_links,".pgn", sep="")
my_opening=c()
for(i in 1:1){
pgn <- read.table(files_to_read[i], quote="", sep="\n", stringsAsFactors=FALSE)
moves = which(sub("\\[(\\b+).+", "\\1", pgn[1:nrow(pgn),1]) != "")
head(moves)
my_five_moves = unlist(stri_match_all(pgn[moves,], regex = "1. \\s*(.*?)\\s* 6. "))
head(my_five_moves)
filtred_five_moves = my_five_moves[seq(1,length(my_five_moves), by=2)]
head(filtred_five_moves)
my_opening_index=c()
for(i in 1:length(filtred_five_moves)){
five_moves_to_compare = stri_match_all(filtred_five_moves, regex = "(\\d. (\\w+))")[[i]][,3]
ifelse(setequal(five_moves_to_compare,c("e3","d3","Ne2","Nd2","Ng3"))== TRUE,
(my_opening_index = append(my_opening_index,i)),
next)
}
const searching_v1_div = document.getElementById("searching_v1_div");
searching_v1_div.style.display = 'none';
var searching_v1 = document.getElementById("searching_v1");
function searching() {
if (searching_v1_div.style.display === 'none') {
searching_v1_div.style.display = 'block';
searching_v1.innerHTML = "Ukryj kod";
} else {
searching_v1_div.style.display = 'none';
searching_v1.innerHTML = "Pokaż kod";
}
}
#files_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",full_date_from_imported_links,".pgn", sep="")
search_by_moves = function(searching_moves){
opening_count=c()
for(i in 1:length(files_to_read)){
pgn <- read.table(files_to_read[i], quote="", sep="\n", stringsAsFactors=FALSE)
moves = which(sub("\\[(\\b+).+", "\\1", pgn[1:nrow(pgn),1]) != "")
All_games_count = length(pgn[moves,])
for(j in 1:length(searching_moves)){
my_regex = paste("(.*?)\\s*",j,". (\\w+).+", sep = "")
moves_to_compare = sub(my_regex, "\\2", pgn[moves,])
moves = moves[which(moves_to_compare == searching_moves[j])]
}
opening_count = append(opening_count,(length(moves)/All_games_count)*100)
}
return(opening_count)
}
search_by_moves(c("e3","d3","Ne2","Nd2","Ng3"))
my_opening_count <- data.frame(count=opening_count)
Data_opening_count = cbind(data.frame(year=year_from_imported_links),my_opening_count)
ggplot(Data_opening_count, aes(x = rev(1:length(games_count)), y = count)) +
# typ wykresu
geom_bar(stat = "identity",aes(fill = year)) +
# opis
labs(x = "Years", y = "Number of chess games", title = "Number of 'Cow' move sequences played") +
# legenda
scale_fill_manual(name = "Years", values = unique(Data_opening_count$year)) +
expand_limits(x = c(0, NA), y = c(0, 10)) +
scale_x_continuous(breaks = c(rev(first_index_of_years)),
labels = c(rev(unique(year_from_imported_links)))) +
theme(text = element_text(size = 20),
axis.text.x = element_text(angle = 90, hjust = 1)) +
# strzalka
geom_segment(aes(80, 3.2, xend = 123, yend = 2.2),
linewidth=1.2,
arrow = arrow(length = unit(5, "mm"))) +
inset_element(p = img4,
left = 0.42,
bottom = 0.34,
right = 0.75,
top = 0.51)
ggsave("../poster/Data_opening_count.png")
files_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",full_date_from_imported_links,".pgn", sep="")
search_games_by_moves = function(searching_moves, months_to_search){
openings=data.frame()
for(i in 1:months_to_search){
pgn <- read.table(files_to_read[i], quote="", sep="\n", stringsAsFactors=FALSE)
moves = which(sub("\\[(\\b+).+", "\\1", pgn[1:nrow(pgn),1]) != "")
for(j in 1:length(searching_moves)){
my_regex = paste("(.*?)\\s*",j,". (\\w+).+", sep = "")
moves_to_compare = sub(my_regex, "\\2", pgn[moves,])
moves = moves[which(moves_to_compare == searching_moves[j])]
}
for(z in 1:length(pgn[moves,])){
moves_detect_whiteV1 = stri_match_all(pgn[moves,], regex = "(\\d. (\\w+))")[[z]][,3]
moves_detect_white = paste(1:length(moves_detect_whiteV1),". ",moves_detect_whiteV1, sep = "")
moves_detect_blackV1 = stri_match_all(pgn[moves,], regex = "(\\d.\\.. (\\w+))")[[z]][,3]
moves_detect_black = paste(1:length(moves_detect_blackV1),"... ",moves_detect_blackV1, sep = "")
opening_moves = numeric(length(moves_detect_white)+length(moves_detect_black))
opening_moves[seq(1, length(opening_moves), by=2)] <- moves_detect_white
opening_moves[seq(2, length(opening_moves), by=2)] <- moves_detect_black
openings = rbind(openings,moves=paste(opening_moves, collapse = " "))
}
colnames(openings) = "moves"
}
return(openings)
}
pander(head(search_games_by_moves(c("Nf3","Ng1"),1)))
| moves |
|---|
| 1. Nf3 1… d5 2. Ng1 2… Nf6 3. Nf3 3… Bf5 4. Ng1 4… e6 5. e3 5… Be7 6. d4 6… O 7. Nf3 7… c5 8. Bd3 8… Nc6 9. Bxf5 9… exf5 10. O 10… Ne4 11. c3 11… Re8 12. Nbd2 12… a5 13. Nb3 13… Qb6 14. dxc5 14… Nxc5 15. Nxc5 15… Qxc5 16. Nd4 16… Rad8 17. Nxf5 17… Bf8 18. Nd4 18… g6 19. Nxc6 19… bxc6 20. Qf3 20… Bg7 21. Re1 21… a4 22. a3 22… Rb8 23. g3 23… h5 24. e4 24… Rxe4 25. Rxe4 25… dxe4 26. Qxe4 26… Rd8 27. Be3 27… Qb5 28. Qb4 28… Qd5 29. Qxa4 29… Be5 30. Bd4 30… Bxd4 31. cxd4 31… Qxd4 32. Qxd4 32… Rxd4 33. Rc1 33… Rd2 34. b4 34… Ra2 35. Rxc6 35… Rxa3 36. b5 36… Rb3 37. b6 37… Kg7 38. Kg2 38… Rb1 39. Kf3 39… Rb3 40. Ke4 40… Rb4 41. Kd5 41… Rb1 42. Kd6 42… Rd1 43. Kc7 43… Rb1 |
| 1. Nf3 1… c5 2. Ng1 2… g6 3. Nf3 3… Bg7 4. Ng1 4… Nc6 5. Nf3 5… d5 6. Ng1 6… Nf6 7. Nf3 7… O 8. d3 8… e5 9. g3 9… Qe7 10. Bg2 10… Be6 11. Nbd2 11… a5 12. c4 12… d4 13. b3 13… Rab8 14. O 14… b5 15. Re1 15… bxc4 16. Nxc4 16… Rb4 17. Ba3 17… Rb5 18. e3 18… Bxc4 19. dxc4 19… Rb6 20. exd4 20… cxd4 21. Bxe7 21… Nxe7 22. Nxe5 22… Ne4 23. Rxe4 |
| 1. Nf3 1… e6 2. Ng1 2… Bc5 3. Nf3 3… d6 4. e3 4… Nf6 5. d4 5… Bb6 6. Bd2 6… c5 7. Nc3 7… cxd4 8. exd4 8… Ng4 9. h3 9… Nf6 10. Qe2 10… Ba5 11. O 11… Qc7 12. Qe3 12… d5 13. Ne5 13… Ne4 14. Nb5 14… Bxd2 15. Rxd2 15… Qa5 16. Nd6 16… Nxd6 17. Rd3 17… Nc4 18. Qe1 18… Qb5 19. Rb3 19… Qa4 20. Bxc4 20… dxc4 21. Rb4 21… Qxa2 22. Kd2 22… O 23. Qe3 23… Rd8 24. h4 24… Qa5 25. c3 25… Nc6 26. f4 26… Nxb4 27. cxb4 27… Qxb4 |
| 1. Nf3 1… d5 2. Ng1 2… e5 3. g3 3… e4 4. Bg2 4… f5 5. e3 5… f4 6. d3 6… f3 7. Bf1 7… c6 8. dxe4 8… dxe4 9. Qxd8 9… Kxd8 10. Nc3 10… Nf6 11. Bd2 11… Bf5 12. Bc4 12… Nbd7 13. O 13… b5 14. Bb3 14… Kc7 15. a3 15… Nc5 16. Be1 16… Nxb3 17. cxb3 17… a6 18. Kb1 18… a5 19. Na2 19… b4 20. a4 20… Be7 21. Bd2 21… Rhd8 22. Bc1 22… Rxd1 23. h3 23… h5 24. h4 24… Bg4 25. Kc2 25… Rf1 26. Kd2 26… Rxf2 27. Ke1 27… Rg2 28. Nh3 28… Nd5 29. Nf4 29… Nxf4 30. gxf4 30… Bc5 |
| 1. Nf3 1… c5 2. Ng1 2… d5 3. g3 3… d4 4. Bg2 4… Bg4 5. d3 5… Bxe2 6. Qxe2 6… Nc6 7. Nf3 7… Nb4 8. O 8… Nf6 9. a3 9… Nc6 10. Qd2 10… e6 11. Qd1 11… Bd6 12. Bg5 12… O 13. h4 13… h6 14. Bxf6 14… Qxf6 15. Nbd2 15… Qg6 16. Ne4 16… Bc7 17. c4 17… dxc3 18. bxc3 18… Rad8 19. Qe2 19… f5 20. Nxc5 20… Bxg3 21. fxg3 21… Qxg3 22. Qxe6 22… Kh7 23. d4 23… Rfe8 24. Qc4 24… Rf8 25. Ne6 25… g5 26. Nxf8 26… Rxf8 27. Ne5 27… gxh4 28. Nxc6 28… bxc6 29. Rae1 29… h3 30. Re7 30… Kg6 31. Qe6 31… Kh5 32. Kh1 32… Qxg2 |
| 1. Nf3 1… e6 2. Ng1 2… Be7 3. f4 3… d6 4. Nf3 4… c6 5. e3 5… b6 6. d4 6… Bb7 7. Bd3 7… Nd7 8. O 8… Ngf6 9. c3 9… O 10. Bc2 10… c5 11. Nbd2 11… cxd4 12. exd4 12… e5 13. Nb1 13… exf4 14. Bxf4 14… Nd5 15. Nbd2 15… Nxf4 16. Ne4 16… Nd5 17. Qd3 17… N5f6 18. Nxf6 18… Nxf6 19. Ng5 19… g6 20. Rxf6 20… Bxf6 21. Nf3 21… Re8 22. Rf1 22… Be4 23. Qd1 23… Bxc2 24. Qxc2 24… d5 25. Qf2 25… b5 26. Qg3 26… Bg7 27. Qh3 27… Qb6 28. Ng5 28… Bxd4 29. Kh1 29… h6 30. Qf3 30… Bg7 31. Qxf7 31… Kh8 32. Ne6 32… Rxe6 33. Qf8 33… Rxf8 34. Rxf8 34… Bxf8 35. h4 |